In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [6]:
df = pd.read_csv('All content.csv', index_col='Published',parse_dates=True)
df['count']=1
df = df[(df['Page Views'] > 200)]
In [7]:
df_resampled = df.resample('D',how ='sum')
In [8]:
df_trunc = df_resampled.truncate(before='2015-06-01', after='2016-03-04')
In [9]:
df_trunc = df_trunc.dropna()
In [10]:
df_trunc = df_trunc[['Page Views', 'Social Actions', 'Social Referrals', 'Facebook Shares', 'count']]
df_trunc['mean']=df_trunc['Page Views']//df_trunc['count']
In [11]:
df_trunc.plot(kind='scatter',x='count',y='Page Views',title='PVs total')
df_trunc.plot(kind='scatter',x='count',y='mean',title='Average PVs')
df_trunc.plot(kind='scatter',x='count',y='Facebook Shares',title='Total Facebook Shares')
Out[11]:
In [12]:
df2 = pd.read_csv('All content.csv',index_col='Published',parse_dates=True)
df_articles = df2[(df2['Url'].str.contains('/articles/',na=False))]
df_places = df2[(df2['Url'].str.contains('/places/',na=False))]
df_articles['count']=1
df_places['count']=1
In [13]:
df_articles_resampled = df_articles.resample('D',how='sum')
df_articles_trunc = df_articles_resampled.truncate(before='2015-06-01', after='2016-03-04')
df_articles_trunc = df_articles_trunc.dropna()
df_articles_trunc = df_articles_trunc[['Page Views', 'Social Actions', 'Social Referrals', 'Facebook Shares', 'count']]
df_articles_trunc['mean']=df_articles_trunc['Page Views']//df_articles_trunc['count']
In [14]:
df_articles_trunc.plot(kind='scatter',x='count',y='Page Views',title='Articles PVs total')
df_articles_trunc.plot(kind='scatter',x='count',y='mean',title='Articles Average PVs')
df_articles_trunc.plot(kind='scatter',x='count',y='Facebook Shares',title='Total Facebook Shares')
Out[14]:
In [15]:
df_places_resampled = df_places.resample('D',how='sum')
df_places_trunc = df_places_resampled.truncate(before='2015-06-01', after='2016-03-04')
df_places_trunc = df_places_trunc.dropna()
df_places_trunc = df_places_trunc[['Page Views', 'Social Actions', 'Social Referrals', 'Facebook Shares', 'count']]
df_places_trunc['mean']=df_places_trunc['Page Views']//df_places_trunc['count']
In [16]:
df_places_trunc.plot(kind='scatter',x='count',y='Page Views',title='Places PVs total')
df_places_trunc.plot(kind='scatter',x='count',y='mean',title='Places Average PVs')
df_places_trunc.plot(kind='scatter',x='count',y='Facebook Shares',title='Total Facebook Shares')
Out[16]:
In [17]:
df_articles_resampled2 = df_articles.resample('D',how='median')
df_articles_trunc2 = df_articles_resampled2.truncate(before='2015-06-01', after='2016-03-04')
df_articles_trunc2 = df_articles_trunc2.dropna()
df_articles_trunc2 = df_articles_trunc2[['Page Views', 'Social Actions', 'Social Referrals', 'Facebook Shares']]
df_articles_trunc2['count']=df_articles_trunc['count']
In [18]:
df_articles_trunc2.plot(kind='scatter',x='count',y='Page Views',title='Median Articles PVs')
Out[18]:
In [19]:
df_places_resampled2 = df_places.resample('D',how='median')
df_places_trunc2 = df_places_resampled2.truncate(before='2015-06-01', after='2016-03-04')
df_places_trunc2 = df_places_trunc2.dropna()
df_places_trunc2 = df_places_trunc2[['Page Views', 'Social Actions', 'Social Referrals', 'Facebook Shares']]
df_places_trunc2['count']=df_places_trunc['count']
df_places_trunc2.plot(kind='scatter',x='count',y='Page Views',title='Median Places PVs')
Out[19]:
import statsmodels.formula.api as smf
In [22]:
import statsmodels.formula.api as smf
df_trunc
Out[22]:
In [30]:
lm = smf.ols(formula="Page Views ~ count", data=df_trunc).fit()
lm.summary()
In [ ]: